******************************************************************************************
* ONLINE APPENDIX FIGURE A7
*
* 1) ESTIMATE THE EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' WAGES BY INDUSTRY
*  
* LAST MODIFIED: JULY, 2025
******************************************************************************************

clear all 

* INSTALL SCHEMES FOR GRAPHS
*ssc install blindschemes, replace all 
*ssc install schemepack, replace /*based on https://github.com/asjadnaqvi/Stata-schemes*/

**# DIRECTORIES

* PAPER DIRECTORY DROPBOX
global fiscal "/Users/andres/Library/CloudStorage/Dropbox/2 Papers/2023/Paper_Fiscal_Costs"

* SUBDIRECTORIES
global data "$fiscal/1 Data/12 Final Datasets"
global programs "$fiscal/2 Programs/2 Figures and Tables paper"
global results "$fiscal/3 Results"

**# LOAD DATASET AND CREATE NEW VARIABLES

* LOAD DATASET WITH VARIABLES NEEDED
use "$data/LaborEffects.dta", clear

**#SAMPLE SELECTION 

* ONLY METROPOLITAN AREAS 
drop if area >= 81 /* not observations for these areas*/
drop if area == 25 /* not observations */
keep if area !=. 

* DUMMIES FOR YEAR AND MSA
tab year, gen(year)
tab area, gen(area)

* KEEP ONLY WORKING-AGE POPULATION
drop if (age<15|age>64) 

* KEEP IF NOT ENROLLED IN SCHOOL
*keep if att_school == 2 

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if weekly_hours>100

* DELIMIT THE ANALYSIS FOR THE YEARS 2013-2018. 
drop if year > 2018

* DISPLAY FORMAT FOR SAMPLING WEIGHTS 
format %12.0f fex12

* GENERATE ROUNDED WEIGHTS FOR SPECIFIC COMMANDS THAT REQUIRE THEM 
gen fex12_round = fex12
replace fex12_round = round(fex12_round) 

**# GLOBAL MACROS FOR VARIABLES
global controls year2-year6 area2-area23 i.sex1 age c.age#c.age educ2-educ5
global x        mig_share2013
global z1       IV2005
global z2       IVdist
global w        fex12

**# EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' WAGES BY TYPE OF JOB
* NOTE: USUALLY THE SAMPLE IS RESTRICTED TO NATIVES IN MSAs WHO ARE 
* NOT ATTENDING SCHOOL AND NOT SELF-EMPLOYED. HOWEVER, MOST THE 
* PAPERS EXPLORING SIMILAR TOPICS FOR COLOMBIA DON'T DO SO.  

* KEEP IF NOT ENROLLED IN SCHOOL
*keep if att_school == 2 

* KEEP IF NOT SELF-EMPLOYED 
*keep if emp_type != 4

* KEEP ONLY SALARIED WORKERS
*keep if emp_type == 1

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if weekly_hours>100

**## HANDLING OUTLIERS
levelsof year, local(year)
foreach j of local year {
display "this is year `j'"
centile hourly_real_wages if year == `j', cent(0.5 99.5)
return list 
local cent1 = r(c_1)
display `cent1'
local cent99 = r(c_2)
display `cent99'
* keep only de wage distribution between 1 and 99
drop if hourly_real_wages > `cent99'  
drop if hourly_real_wages < `cent1' 
}

levelsof industry, local(industry)
foreach j of local industry {
	
display "This is industry `j'"

matrix C`j' = J(3,3,.)
matrix colnames C`j' = b ll ul 
matrix rownames C`j' = OLS IV1 IV2 

**## OLS
ivreg2 log_hourly_real_wages $x $controls if emp == 1 & immig == 0 & area != . & industry == `j' [pw=$w], cluster(area)  partial(area2-area23) small 
matrix C`j'[1,1] = _b[$x]
matrix C`j'[1,2] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix C`j'[1,3] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]

**## IV1 (ENCLAVE INSTRUMENT)
ivreg2 log_hourly_real_wages ($x = $z1) $controls if emp == 1 & immig == 0 & area != . & industry == `j' [pw=$w], first cluster(area)  partial(area2-area23) small 
matrix C`j'[2,1] = _b[$x]
matrix C`j'[2,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix C`j'[2,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

**## IV2 (DISTANCE INSTRUMENT)
ivreg2 log_hourly_real_wages ($x = $z2) $controls if emp == 1 & immig == 0 & area != . & industry == `j' [pw=$w], first cluster(area)  partial(area2-area23) small 
matrix C`j'[3,1] = _b[$x]
matrix C`j'[3,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix C`j'[3,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

local ++j
}

matrix list C1
matrix list C2
matrix list C3
matrix list C4
matrix list C5


**# PLOTTING EFFECTS

**## WAGES
*  T-BASED CIs
levelsof industry, local(industry)
foreach j of local industry {
coefplot (matrix(C`j'[,1]), ci((C`j'[,2] C`j'[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on log hourly wages industry `j'", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("A. Wages - industry `j'", size(medium)) fysize(50) fxsize(80)
		 tempfile resultsC`j'
		 graph save "`resultsC`j''"		
}	

**## FIGURE FOR THE ONLINE APPENDIX: WAGE EFFECTS BY TYPE OF JOB (T-BASED CIs)  
graph combine "`resultsC1'" "`resultsC2'" "`resultsC3'" "`resultsC4'"  ///
"`resultsC5'" "`resultsC6'" "`resultsC7'" "`resultsC8'"  ///
"`resultsC9'" "`resultsC10'" "`resultsC11'" "`resultsC12'"  ///
"`resultsC13'" "`resultsC14'" "`resultsC15'"   ///
, graphregion(color(white)) cols(4) rows(4) xcommon ycommon imargin(0 0 0 0)  ///
 iscale(0.72) l2title("Effect on log hourly wages") scheme(plottig) play(EffectWagesSector)
qui graph save "$results/1 Figures Paper/FigA7_1.gph", replace 
qui graph export "$results/1 Figures Paper/FigA7_1.eps", as(eps) replace		 

 
